View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: SmartGetMethod.java,v 1.2 2005/08/05 15:55:53 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.retriever;
28  
29  import java.io.IOException;
30  import java.util.zip.GZIPInputStream;
31  
32  import org.apache.commons.httpclient.*;
33  import org.apache.commons.httpclient.methods.GetMethod;
34  /***
35   *
36   *
37   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
38   * @version <tt>$Revision: 1.2 $</tt>
39   */
40  
41  public class SmartGetMethod extends GetMethod {
42  
43      public SmartGetMethod() {
44          super();
45      }
46  
47      /***
48       * Constructor specifying a URI.
49       *
50       * @param uri either an absolute or relative URI
51       *
52       * @since 1.0
53       */
54      public SmartGetMethod(String uri) {
55          super(uri);
56      }
57  
58  
59      /***
60       * Overrides method in {@link HttpMethodBase}.
61       *
62       * Notifies the server that we can process a GZIP-compressed response before
63       * sending the request.
64       *
65       */
66      public int execute(HttpState state, HttpConnection conn)
67      throws HttpException, HttpRecoverableException, IOException {
68          //System.out.println("OVERRIDING execute");
69          // Tell the server that we can handle GZIP-compressed data in the response body
70          addRequestHeader("Accept-Encoding", "gzip");
71          addRequestHeader("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");
72  
73          return super.execute(state, conn);
74      }
75  
76      /***
77       * Overrides method in {@link GetMethod} to set the responseStream variable appropriately.
78       *
79       * If the response body was GZIP-compressed, responseStream will be set to a GZIPInputStream
80       * wrapping the original InputStream used by the superclass.
81       *
82       */
83      protected void readResponseBody(HttpState state, HttpConnection conn) throws IOException, HttpException {
84          super.readResponseBody(state, conn);
85          //System.out.println("OVERRIDING readResponseBody");
86          Header contentEncodingHeader = getResponseHeader("Content-Encoding");
87  
88          if (contentEncodingHeader != null && contentEncodingHeader.getValue().equalsIgnoreCase("gzip"))
89              setResponseStream(new GZIPInputStream(getResponseStream()));
90      }
91  
92  }